Hackathon on EV vehicles¶

In [1]:
import plotly
import pandas as pd 
df =pd.read_csv(r"C:\Users\limit\Downloads\evdataset.csv.csv")# loading the dataset 
df
Out[1]:
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility Electric Range Base MSRP Legislative District DOL Vehicle ID Vehicle Location Electric Utility 2020 Census Tract
0 JTMEB3FV6N Monroe Key West FL 33040 2022 TOYOTA RAV4 PRIME Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 42 0 NaN 198968248 POINT (-81.80023 24.5545) NaN 12087972100
1 1G1RD6E45D Clark Laughlin NV 89029 2013 CHEVROLET VOLT Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 38 0 NaN 5204412 POINT (-114.57245 35.16815) NaN 32003005702
2 JN1AZ0CP8B Yakima Yakima WA 98901 2011 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 73 0 15.0 218972519 POINT (-120.50721 46.60448) PACIFICORP 53077001602
3 1G1FW6S08H Skagit Concrete WA 98237 2017 CHEVROLET BOLT EV Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 238 0 39.0 186750406 POINT (-121.7515 48.53892) PUGET SOUND ENERGY INC 53057951101
4 3FA6P0SU1K Snohomish Everett WA 98201 2019 FORD FUSION Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 26 0 38.0 2006714 POINT (-122.20596 47.97659) PUGET SOUND ENERGY INC 53061041500
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
112629 7SAYGDEF2N King Duvall WA 98019 2022 TESLA MODEL Y Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... 0 0 45.0 217955265 POINT (-121.98609 47.74068) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033032401
112630 1N4BZ1CP7K San Juan Friday Harbor WA 98250 2019 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 150 0 40.0 103663227 POINT (-123.01648 48.53448) BONNEVILLE POWER ADMINISTRATION||ORCAS POWER &... 53055960301
112631 1FMCU0KZ4N King Vashon WA 98070 2022 FORD ESCAPE Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 38 0 34.0 193878387 POINT (-122.4573 47.44929) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033027702
112632 KNDCD3LD4J King Covington WA 98042 2018 KIA NIRO Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 26 0 47.0 125039043 POINT (-122.09124 47.33778) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033032007
112633 YV4BR0CL8N King Covington WA 98042 2022 VOLVO XC90 Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 18 0 47.0 194673692 POINT (-122.09124 47.33778) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 53033032005

112634 rows × 17 columns

In [2]:
#describing the data 
df.describe()
Out[2]:
Postal Code Model Year Electric Range Base MSRP Legislative District DOL Vehicle ID 2020 Census Tract
count 112634.000000 112634.000000 112634.000000 112634.000000 112348.000000 1.126340e+05 1.126340e+05
mean 98156.226850 2019.003365 87.812987 1793.439681 29.805604 1.994567e+08 5.296650e+10
std 2648.733064 2.892364 102.334216 10783.753486 14.700545 9.398427e+07 1.699104e+09
min 1730.000000 1997.000000 0.000000 0.000000 1.000000 4.777000e+03 1.101001e+09
25% 98052.000000 2017.000000 0.000000 0.000000 18.000000 1.484142e+08 5.303301e+10
50% 98119.000000 2020.000000 32.000000 0.000000 34.000000 1.923896e+08 5.303303e+10
75% 98370.000000 2022.000000 208.000000 0.000000 43.000000 2.191899e+08 5.305307e+10
max 99701.000000 2023.000000 337.000000 845000.000000 49.000000 4.792548e+08 5.603300e+10
In [3]:
#checking the data for dtype, non null
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112634 entries, 0 to 112633
Data columns (total 17 columns):
 #   Column                                             Non-Null Count   Dtype  
---  ------                                             --------------   -----  
 0   VIN (1-10)                                         112634 non-null  object 
 1   County                                             112634 non-null  object 
 2   City                                               112634 non-null  object 
 3   State                                              112634 non-null  object 
 4   Postal Code                                        112634 non-null  int64  
 5   Model Year                                         112634 non-null  int64  
 6   Make                                               112634 non-null  object 
 7   Model                                              112614 non-null  object 
 8   Electric Vehicle Type                              112634 non-null  object 
 9   Clean Alternative Fuel Vehicle (CAFV) Eligibility  112634 non-null  object 
 10  Electric Range                                     112634 non-null  int64  
 11  Base MSRP                                          112634 non-null  int64  
 12  Legislative District                               112348 non-null  float64
 13  DOL Vehicle ID                                     112634 non-null  int64  
 14  Vehicle Location                                   112610 non-null  object 
 15  Electric Utility                                   112191 non-null  object 
 16  2020 Census Tract                                  112634 non-null  int64  
dtypes: float64(1), int64(6), object(10)
memory usage: 14.6+ MB
In [4]:
#checking for missing values 
df.isnull().sum()
Out[4]:
VIN (1-10)                                             0
County                                                 0
City                                                   0
State                                                  0
Postal Code                                            0
Model Year                                             0
Make                                                   0
Model                                                 20
Electric Vehicle Type                                  0
Clean Alternative Fuel Vehicle (CAFV) Eligibility      0
Electric Range                                         0
Base MSRP                                              0
Legislative District                                 286
DOL Vehicle ID                                         0
Vehicle Location                                      24
Electric Utility                                     443
2020 Census Tract                                      0
dtype: int64
  • from the above data we have observed that there are missing values in columns(model, legislative district, vehicle location, electric used)
  • we need to fill those missing values with proper data as the columns play a vital role in further analysis
  • we will deal with missing values by vnr method
In [5]:
df["Model"]=df.groupby(["Make"])['Model'].transform(lambda x: x.fillna(x.mode()[0]))
df["Model"].isnull().sum()#checking for the null values in model column 
Out[5]:
0

Filling the null values in Legislative District by the use of Country,City,State,Postal Code Features¶

In [7]:
df["Legislative District"]=df.groupby(["County","City","State","Postal Code"])["Legislative District"].transform(lambda x: x.fillna(x.mode()[0]))
df["Legislative District"].isnull().sum()#checking weather the missing values are filled or not
Filling the null values in Electric Utility with the help of Make,Model,Electric Vehicle Type,Clean Alternative Fuel Vehicle (CAFV) Eligibility Features¶
In [ ]:
df["Electric Utility"]=df.groupby(["Make","Model","Electric Vehicle Type","Clean Alternative Fuel Vehicle (CAFV) Eligibility"])["Electric Utility"].transform(lambda x: x.fillna(x.mode()[0]))
df["Electric Utility"].isnull().sum()#checking for null values in the column Ecelctric Utility 
  • We have 24 values in Vehicle Location which is 0.021% percentage of data.
  • so the missing values can't effect the data
  • So we are trimming the missing value rows
In [ ]:
df.dropna(subset=['Vehicle Location'], inplace=True)
df.reset_index(drop=True,inplace=True)
df
In [ ]:
#checking the data again for any null values 
df.isnull().sum()

as we can see there are no null values in our data¶

Checking for Dupilcates in columns¶

In [ ]:
df[df.duplicated()]
  • as we can see there and no duplicate any values
In [ ]:
#calculating the frequency(no of occurance) for   column called postal code 
df["Postal Code"].value_counts()
In [ ]:
#calculating the frequency for column model year
df["Model Year"].value_counts()
In [ ]:
#calculating the frequency for column legislative district
df["Legislative District"].value_counts()

From above output we can see that Postal Code,Model Year and Legislative District are in numerical but they are not continuous data¶

  • So we are changing the Data type of the further Features
In [ ]:
df["Legislative District"]=df["Legislative District"].astype("str")
df["Postal Code"]=df["Postal Code"].astype("str")
df["Model Year"]=df["Model Year"].astype("str")

plotting the plots¶

In [10]:
#plotting for the model year (column univariant) 
import plotly.express as px
area=df["Model Year"].value_counts().nlargest(n=10)
fig=px.bar(y=area.values,
          x=area.index,
          orientation="v",
           color=area.index,
           
           text=area.values,
          color_discrete_sequence=px.colors.qualitative.Bold)
fig.show()

we can observe that the highest ev vehicle manufacturing happened in the year 2022 and the manufacturing is constantly increasing every year since 2014 so we can predict more manufacturing of ev vehicles in upcoming years¶

In [11]:
#plotting for the column Make 
area=df["Make"].value_counts().nlargest(n=10)
fig=px.bar(y=area.values,
          x=area.index,
          orientation="v",
           color=area.index,
           
           text=area.values,
          color_discrete_sequence=px.colors.qualitative.Bold)
fig.show()

As compared to other ev manufacturing companies Tesla is manufacturing more number of ev vehicles all over the world¶

In [12]:
#plotting a pie chart for electrical vehicle type 
Vechile_type = df['Electric Vehicle Type'].value_counts()
fig = px.pie(Vechile_type, names=Vechile_type.index, values=Vechile_type.values, title='Pie Chart of Electric Vehicle Type')
fig.show()

battery electrical vehicles are more manufacttured than hybrid vehicles there is chance of increasing the manufacturing of plug-in-hybrid vehicle in upcoming years as we can see the hybrif vehicles also constantly increasing year by year¶

In [13]:
#plotting the electrical range 
fig=px.histogram(df,x="Electric Range",nbins=10)
fig.show()
In [14]:
# plotting a box plot to see any outliers in electric range 
fig=px.box(df,x="Electric Range")
fig.show()
From above Graph we can see that there is no any outliers in the Electric Range Feature¶
In [15]:
#plotting Box Plot of Electric Vehicle Type vs. Electric Range
fig_boxplot = px.box(df, x='Electric Vehicle Type', y='Electric Range', title='Box Plot of Electric Vehicle Type vs. Electric Range')
fig_boxplot.show()
  • From Past Diagram we can see there is no any outliers in Electric Range. but when we ploted above whe can see that when we compared Electric Range with Electric Vechile Type.we can see outliers in Plug-in Hybrid Electric Vehicle (PHEV).

Task-2¶

Create a Choropleth to display the number of EV vehicles based on States in USA¶

In [8]:
import plotly.graph_objects as go
def create_ev_choropleth_map(df):
    # Calculate the count of EV vehicles for each state
    ev_count_by_state = df['State'].value_counts().reset_index()
    ev_count_by_state.columns = ['State', 'EV Count']

    # Create the Choropleth map using plotly.graph_objects
    fig_choropleth = go.Figure(data=go.Choropleth(
        locations=ev_count_by_state['State'],
        z=ev_count_by_state['EV Count'],
        locationmode='USA-states',
        colorscale='Viridis',
        colorbar_title='Number of EV Vehicles',
    ))

    # Set the map title and layout
    fig_choropleth.update_layout(
        title_text='Choropleth Map of EV Vehicles by State',
        geo_scope='usa',  # Limit map scope to the USA
    )

    # Return the Choropleth map figure
    return fig_choropleth
# Call the function and display the Choropleth map
fig = create_ev_choropleth_map(df)
fig.show()

Task-3¶

Create a Racing Bar Plot to display the animation of EV Make and its count each year¶

In [ ]:
import pandas as pd

# Converting the 'Model Year' column to datetime type
df['Model Year'] = pd.to_datetime(df['Model Year'], format='%Y')

# Group by 'Model Year' and 'Make' to get the count of each make for each year
df_grouped = df.groupby(['Model Year', 'Make']).size().reset_index(name='Count')
In [ ]:
import bar_chart_race as bcr
# Pivot the DataFrame to have 'Make' as columns and 'Model Year' as index
df_pivot = df_grouped.pivot(index='Model Year', columns='Make', values='Count')

# Fill missing values using forward fill (pad)
df_pivot = df_pivot.fillna(method='pad')

# Create the Racing Bar Plot
bcr.bar_chart_race(
    df=df_pivot,
    filename='ev_make_racing_bar_plot.mp4',  # Output file name for the animation
    orientation='h',  # Horizontal bars
    sort='desc',      # Sort the bars in descending order
    n_bars=10,        # Number of bars to show
    fixed_order=False, # Allow bars to change positions
    title='EV Make Racing Bar Plot by Year',  # Animation title
    label_bars=True,  # Show the value label on each bar
    period_label={'x': 0.99, 'y': 0.25, 'ha': 'right', 'va': 'center'},  # Position of the year label
    period_fmt='%Y',  # Format of the year label
)
In [ ]: